卡方分析
在显著性水平α=0.05下
“Work_Authorisation”、“Employment_type”和”Studio”变量的p值小于α
说明此三个变量与员工离入职情况之间存在显著关联,有可能是决定员工离职的重要因素,因此对此变量展开进一步研究
离入职时间
可以发现最早入职的员工为2011年3月入职,最晚入职的员工为2019年9月入职
有47位员工离职,离职员工中最早离职时间为2019年1月3日,最晚离职时间为2019年8月16日,平均离职时间为2019年3月30日,最早入职时间为2011年3月15日,最晚入职时间为2018年12月19日,平均入职时间为2015年7月29日,截至2019年12月31日,在职时间最长为3072天,最短为95天,平均在职时长为1340天
有93为员工新入职,入职员工最早入职为2019年1月2日,最晚入职为2019年9月25日,截至2019年12月31日,在职时长最长为363天,最短为97天,平均在职时长为223天
截止2019年12月31日所有离在职员工平均入职时间为598天
library(caret) set.seed(2021212254) df_all\(Type_of_movement <- as.factor(df_all\)Type_of_movement) sample <- sample(c(TRUE, FALSE), nrow(df_all), replace=TRUE, prob=c(0.7,0.3)) train <- df_all[sample, ] test <- df_all[!sample, ] logic_model <- glm(Type_of_movement ~ Work_Authorisation + Employment_type + Studio + area + Country + Date_of_work + Client + Manager_EMP_ID + Org + Job_Title, data = train, family = “binomial”) model2<-step(object = logic_model,trace = 0)
Dependent variable:
Type_of_movement
Work_AuthorisationPermanent Resident 3.864***
(1.491)
Work_Authorisationunknown 21.890
(5,184.903)
Work_AuthorisationWork Permit -16.837
(4,941.803)
Date_of_work 0.012***
(0.004)
Constant -6.444***
(1.631)
Observations 94
Log Likelihood -10.552
Akaike Inf. Crit. 31.104
—————————————————————————————————————————————————————————————————————
模型的 McFadden R方为 0.8249651
得到结论
变量均显著,模型通过检验且有效
McFadden R方为0.8249,模型拟合性能尚可
通过系数大小可以发现,Work_Authorisation中的’work permit’降低员工离职的可能性,而Work_Authorisation中的Permanent Resident 、unknown 以及在职时长Date_of_work 均会增加员工离职可能,其中Work_Authorisation为unknown对员工离职影响大于Permanent Resident
且AUC值为0.88,较为合理,能够较为准确预测员工是否将会离职
---
title: "人力资源变动"
output:
flexdashboard::flex_dashboard:
storyboard: true
social: menu
orientation: rows
vertical_layout: scroll
source: embed
theme: journal
---
```{=html}
<style>
.navbar {
background-color:#D87A80;
border-color:white;
}
.navbar-brand {
color:white!important;
}
</style>
```
```{R}
rm=list()
library(flexdashboard)
library(highcharter)
library(bbplot)
library(ggplot2)
library(dplyr)
library(gapminder)
library(gganimate)
library(ggalt)
library(tidyr)
library(tidyverse)
library(readxl)
library(sparkline)
library(formattable)
library(fontawesome)
library(psych)
library(sf)
library(ggalluvial)
library(shiny)
library(shinydashboard)
library(networkD3)
library(data.table)
library(ggalluvial)
library(networkD3)
library(flexdashboard)
library(readxl)
library(dplyr)
library(tidyr)
library(tidyverse)
library(leaflet)
library(highcharter)
require(knitr)
library(jsonlite)
library(sf)
library(forcats)
library(purrr)
library(reactablefmtr)
country_position <- read_excel('country coord.xlsx')
df_americas<-read_excel('HR Dashboard_v1 - Americas.xlsx')
df_americas['area']<-'americas'
df_apac<-read_excel('HR Dashboard_v1 - APAC.xlsx')
df_apac['area']<-'apac'
df_europe<-read_excel('HR Dashboard_v1 - Europe.xlsx')
df_europe['area']<-'europe'
df_all<-rbind(df_apac,df_europe)%>%rbind(df_americas)
#df_all<-df_all[,-2]
colnames(df_all) <- gsub(" ", "_", names(df_all))
rm(df_americas,df_apac,df_europe)
df_all<-df_all%>% left_join(country_position,by=c('Country'='name'))
df_all<-df_all%>%select(-country,-Sr._No.)
df_all$Date_of_Exit[df_all$Date_of_Exit == '-'] =0
df_all <- df_all %>% mutate(Date_of_Exit = ymd(as.Date(as.numeric(Date_of_Exit), origin = "1899-12-30")),Date_of_Hire=ymd(Date_of_Hire),EMP_ID=as.character(EMP_ID),Manager_EMP_ID=as.character(Manager_EMP_ID))
df_all$Date_of_Exit[df_all$Date_of_Exit == ymd('1899-12-30')]=ymd('0000-00-00')
df_all$Reason_for_Leaving[df_all$Reason_for_Leaving == '-'] <- NA
df_all$Attrition_Type[df_all$Attrition_Type == '-'] <- NA
df_all$Work_Authorisation[is.na(df_all$Work_Authorisation)]<-'unknown'
df_all<- df_all%>%mutate()
df_all<-df_all%>%mutate(Date_of_work=ifelse(is.na(Date_of_Exit),ymd('2019-12-31')-Date_of_Hire,Date_of_Exit-Date_of_Hire))
```
# 总览 {data-icon="fa-user-alt"}
## Row {data-width=150}
### 新进总人数
```{r}
library(flexdashboard)
library(readxl)
library(dplyr)
library(tidyr)
library(tidyverse)
library(leaflet)
library(highcharter)
require(knitr)
library(jsonlite)
library(sf)
library(forcats)
library(purrr)
library(flexdashboard)
library(readxl)
library(dplyr)
library(tidyr)
library(tidyverse)
library(leaflet)
library(highcharter)
require(knitr)
library(jsonlite)
library(sf)
library(forcats)
library(purrr)
#读入数据
USA_data3<-read_excel("HR Dashboard_v1 - Americas.xlsx","data")
APAC_data3<-read_excel("HR Dashboard_v1 - APAC.xlsx","data")
EU_data3<-read_excel("HR Dashboard_v1 - Europe.xlsx","data")
country_coor3<-read_excel("country coord.xlsx")
#数据预处理
##清除无用数据
America3<-USA_data3[,c(-1,-4,-5,-6,-7,-14,-16)]
APAC3<-APAC_data3[,c(-1,-4,-5,-6,-7,-14,-16)]
EU3<-EU_data3[,c(-1,-4,-5,-6,-7,-14,-16)]
##绘制新表:总表、离职表、入职表
global3<-rbind(America3,APAC3,EU3) #总表
colnames(global3)[10]<-'movement_type'
dplyr::filter(global3,movement_type=="Entry")->entry3 #入职表
dplyr::filter(global3,movement_type=="Exit")->exit3 #离职表
##新表处理
global_1=global3 %>% group_by(Country) %>% summarise(count=n())
global_1=left_join(global_1,country_coor3[,2:4],by=c("Country"="name"))
exit_1=exit3 %>% group_by(Country) %>% summarise(count=n())
exit_1=left_join(exit_1,country_coor3[,2:4],by=c("Country"="name"))
entry_1=entry3 %>% group_by(Country) %>% summarise(count=n())
entry_1=left_join(entry_1,country_coor3[,2:4],by=c("Country"="name"))
colnames(America3)[10]<-'movement_type'
number1=America3 %>% group_by(movement_type) %>%
summarise(count=n())
number1$location<-c("America","America")
colnames(APAC3)[10]<-'movement_type'
number2=APAC3 %>% group_by(movement_type) %>%
summarise(count=n())
number2$location<-c("APAC","APAC")
colnames(EU3)[10]<-'movement_type'
number3=EU3 %>% group_by(movement_type) %>%
summarise(count=n())
number3$location<-c("EU","EU")
```
<!-- # page1 {.storyboard,data-icon="fa-user-alt"} -->
```{r}
newhires=entry3 %>% nrow()
flexdashboard::valueBox(value = newhires,icon = "fa-user-plus",caption = "新进人员",color = "#FFB980")
```
### 离职总人数
```{r}
exits=exit3 %>% nrow()
flexdashboard::valueBox(value = exits,icon = "fa-user-minus",caption = "离职人员", color = "#5AB1EF")
```
### 净变动
```{r}
netchange=(newhires-exits)
# If loop to have either up-arrow or down-arrow icon on valuebox based on the value of netchange
if(netchange>0){
flexdashboard::valueBox(value = netchange,icon = "fa-arrow-up",caption = "净变动", color = "#2EC7C9")
} else{
flexdashboard::valueBox(value = netchange,icon = "fa-arrow-down",caption = "净变动", color = "lightblue")}
```
## Row {data-height="300"}
### americas离职变动情况
```{r}
#读入数据
USA_data3<-read_excel("HR Dashboard_v1 - Americas.xlsx","data")
APAC_data3<-read_excel("HR Dashboard_v1 - APAC.xlsx","data")
EU_data3<-read_excel("HR Dashboard_v1 - Europe.xlsx","data")
country_coor3<-read_excel("country coord.xlsx")
#数据预处理
##清除无用数据
America3<-USA_data3[,c(-1,-4,-5,-6,-7,-14,-16)]
APAC3<-APAC_data3[,c(-1,-4,-5,-6,-7,-14,-16)]
EU3<-EU_data3[,c(-1,-4,-5,-6,-7,-14,-16)]
##绘制新表:总表、离职表、入职表
global3<-rbind(America3,APAC3,EU3) #总表
colnames(global3)[10]<-'movement_type'
dplyr::filter(global3,movement_type=="Entry")->entry3 #入职表
dplyr::filter(global3,movement_type=="Exit")->exit3 #离职表
##新表处理
global_1=global3 %>% group_by(Country) %>% summarise(count=n())
global_1=left_join(global_1,country_coor3[,2:4],by=c("Country"="name"))
exit_1=exit3 %>% group_by(Country) %>% summarise(count=n())
exit_1=left_join(exit_1,country_coor3[,2:4],by=c("Country"="name"))
entry_1=entry3 %>% group_by(Country) %>% summarise(count=n())
entry_1=left_join(entry_1,country_coor3[,2:4],by=c("Country"="name"))
colnames(America3)[10]<-'movement_type'
number1=America3 %>% group_by(movement_type) %>%
summarise(count=n())
number1$location<-c("America","America")
colnames(APAC3)[10]<-'movement_type'
number2=APAC3 %>% group_by(movement_type) %>%
summarise(count=n())
number2$location<-c("APAC","APAC")
colnames(EU3)[10]<-'movement_type'
number3=EU3 %>% group_by(movement_type) %>%
summarise(count=n())
number3$location<-c("EU","EU")
### America离职变动情况
entry_americas <- df_all %>% dplyr::filter(area == 'americas')
movement <- entry_americas %>%dplyr::count(Type_of_movement)
colnames(movement) <- c('name', 'y')
highchart() %>%
hc_chart(type = "pie") %>%
hc_title(text = "Americas离入职人员分布") %>%
hc_add_series(data = movement,
innerSize = '60%',
colorByPoint = TRUE,
colors = c('#D87A80', '#807DBA')) %>%
hc_tooltip(pointFormat = '{series.name}: <b>{point.percentage:.1f}%</b>') %>%
hc_title(style = list(fontFamily = "SourceHanSerifSC-Medium")) %>%
hc_subtitle(style = list(fontFamily = "SourceHanSerifSC-Medium")) %>%
hc_add_theme(hc_theme_flat()) %>%
hc_plotOptions(pie = list(
allowPointSelect = TRUE,
cursor = 'pointer',
dataLabels = list(
enabled = TRUE,
format = '<b>{point.name}</b>: {point.percentage:.1f} %'
),
showInLegend = TRUE
))
```
### apac离职变动情况
```{r}
entry_apac <- df_all %>% dplyr::filter(area == 'apac')
movement <- entry_apac %>%dplyr::count(Type_of_movement)
colnames(movement) <- c('name', 'y')
highchart() %>%
hc_chart(type = "pie") %>%
hc_title(text = "apac离入职人员分布") %>%
hc_add_series(data = movement,
innerSize = '60%',
colorByPoint = TRUE,
colors = c('#D87A80', '#807DBA')) %>%
hc_tooltip(pointFormat = '{series.name}: <b>{point.percentage:.1f}%</b>') %>%
hc_title(style = list(fontFamily = "SourceHanSerifSC-Medium")) %>%
hc_subtitle(style = list(fontFamily = "SourceHanSerifSC-Medium")) %>%
hc_add_theme(hc_theme_flat()) %>%
hc_plotOptions(pie = list(
allowPointSelect = TRUE,
cursor = 'pointer',
dataLabels = list(
enabled = TRUE,
format = '<b>{point.name}</b>: {point.percentage:.1f} %'
),
showInLegend = TRUE
))
```
### europe离职变动情况
```{r}
entry_europe <- df_all %>% dplyr::filter(area == 'europe')
movement <- entry_europe %>%dplyr::count(Type_of_movement)
colnames(movement) <- c('name', 'y')
highchart() %>%
hc_chart(type = "pie") %>%
hc_title(text = "europe离入职人员分布") %>%
hc_add_series(data = movement,
innerSize = '60%',
colorByPoint = TRUE,
colors = c('#D87A80', '#807DBA')) %>%
hc_tooltip(pointFormat = '{series.name}: <b>{point.percentage:.1f}%</b>') %>%
hc_title(style = list(fontFamily = "SourceHanSerifSC-Medium")) %>%
hc_subtitle(style = list(fontFamily = "SourceHanSerifSC-Medium")) %>%
hc_add_theme(hc_theme_flat()) %>%
hc_plotOptions(pie = list(
allowPointSelect = TRUE,
cursor = 'pointer',
dataLabels = list(
enabled = TRUE,
format = '<b>{point.name}</b>: {point.percentage:.1f} %'
),
showInLegend = TRUE
))
```
## Row {data-height = 200}
### 各国人员入离职情况
```{r}
library(leaflet)
# 读入数据
USA_data3 <- read_excel('HR Dashboard_v1 - Americas.xlsx', "data")
APAC_data3 <- read_excel('HR Dashboard_v1 - APAC.xlsx', "data")
EU_data3 <- read_excel('HR Dashboard_v1 - Europe.xlsx', "data")
country_coor3 <- read_excel('country coord.xlsx')
# 数据预处理
America3 <- USA_data3[, c(-1, -4, -5, -6, -7, -14, -16)]
APAC3 <- APAC_data3[, c(-1, -4, -5, -6, -7, -14, -16)]
EU3 <- EU_data3[, c(-1, -4, -5, -6, -7, -14, -16)]
global3 <- rbind(America3, APAC3, EU3)
colnames(global3)[10] <- 'movement_type'
entry3 <- dplyr::filter(global3, movement_type == "Entry")
exit3 <- dplyr::filter(global3, movement_type == "Exit")
# 新表处理
global_1 <- global3 %>% group_by(Country) %>% summarise(count = n())
global_1 <- left_join(global_1, country_coor3[, 2:4], by = c("Country" = "name"))
exit_1 <- exit3 %>% group_by(Country) %>% summarise(count = n())
exit_1 <- left_join(exit_1, country_coor3[, 2:4], by = c("Country" = "name"))
entry_1 <- entry3 %>% group_by(Country) %>% summarise(count = n())
entry_1 <- left_join(entry_1, country_coor3[, 2:4], by = c("Country" = "name"))
# 地图可视化
global_1$entrycount <- c(7, 10, 1, 4, 20, 35, 16)
global_1$exitcount <- c(0, 5, 0, 0, 15, 15, 12)
globallabels <- sprintf("%s<br/>entry %d<br/>exit%d", global_1$Country, global_1$entrycount, global_1$exitcount) %>%
lapply(htmltools::HTML)
entrylabels <- sprintf("%s<br/>entry %d", entry_1$Country, entry_1$count) %>%
lapply(htmltools::HTML)
exitlabels <- sprintf("%s<br/>exit %d", exit_1$Country, exit_1$count) %>%
lapply(htmltools::HTML)
map <- leaflet(data = global_1) %>% addTiles() %>%
addMarkers(
~longitude, ~latitude,
label = globallabels,
labelOptions(
style = list("font-weight" = "normal", padding = "3px 8px"),
textsize = "15px",
direction = "auto"
),
group = "global_1"
) %>%
addCircles(~longitude,~latitude,color="blue",weight=global_1$exitcount/1,group="exit_1")%>%
addLayersControl(overlayGroup=c("global_1","entry_1","exit_1"),options = layersControlOptions(collapsed = FALSE))
map
```
### 月变动情况
```{r}
# 加载所需的包
library(readxl)
library(dplyr)
library(ggplot2)
library(plotly)
# 读入数据
USA_data3 <- read_excel('HR Dashboard_v1 - Americas.xlsx', "data")
APAC_data3 <- read_excel('HR Dashboard_v1 - APAC.xlsx', "data")
EU_data3 <- read_excel('HR Dashboard_v1 - Europe.xlsx', "data")
country_coor3 <- read_excel('country coord.xlsx')
# 数据预处理
America3 <- USA_data3[, c(-1, -4, -5, -6, -7, -14, -16)]
APAC3 <- APAC_data3[, c(-1, -4, -5, -6, -7, -14, -16)]
EU3 <- EU_data3[, c(-1, -4, -5, -6, -7, -14, -16)]
global3 <- rbind(America3, APAC3, EU3)
colnames(global3)[10] <- 'movement_type'
entry3 <- dplyr::filter(global3, movement_type == "Entry")
exit3 <- dplyr::filter(global3, movement_type == "Exit")
# 新表处理
global_1 <- global3 %>% group_by(Country) %>% summarise(count = n())
global_1 <- left_join(global_1, country_coor3[, 2:4], by = c("Country" = "name"))
exit_1 <- exit3 %>% group_by(Country) %>% summarise(count = n())
exit_1 <- left_join(exit_1, country_coor3[, 2:4], by = c("Country" = "name"))
entry_1 <- entry3 %>% group_by(Country) %>% summarise(count = n())
entry_1 <- left_join(entry_1, country_coor3[, 2:4], by = c("Country" = "name"))
month=global3 %>% group_by(Month,movement_type) %>%
summarise(count=n())
month$Month<-as.character(month$Month)
# 创建净流入表并合并
month1<-data.frame(Month=c("2019-01-01","2019-02-01","2019-03-01","2019-04-01","2019-05-01","2019-06-01","2019-07-01","2019-08-01","2019-09-01"),
movement_type=c("netinflow"),
count=c("1","-4","5","2","4","11","5","14","8"))
month1$count<-as.integer(month1$count)
month<-rbind(month,month1)
# 创建图形
t4 <- ggplot(month, aes(x = as.Date(Month), y = count, color = movement_type)) +
geom_line() +
labs(title = "月变动情况") +
theme_minimal()
# 转换为可交互的图形
t4 <- ggplotly(t4)
# 打印图形
print(t4)
t4
```
## row {data-height=400}
### 各国人员流动堆积图
```{r}
Entry<-df_all%>% dplyr::filter(Type_of_movement == 'Entry')
Entry<-Entry%>%dplyr::count(Country)
#%>%complete(Country= unique(Entry$Country), fill = list(n = 0))
Entry<-arrange(Entry,Country)
unique(Entry$Country)%>% str_sub(1, 7) -> xlabel
Entry<-subset(Entry,select = c(n))
(Entry$n) -> Entry
Exit<-df_all%>% dplyr::filter(Type_of_movement == 'Exit')
Exit<-Exit%>%dplyr::count(Country)
Exit<-data.frame(
Country = c('Argentina','Australia',' Brazil',' Canada','India','Romania','USA'),
n = c(0,5,0,0,15,15,12))
#%>%complete(Country= unique(Exit$Country), fill = list(n = 0))
Exit<-subset(Exit,select = c(n))
(Exit$n) -> Exit
highchart() %>%
# hc_title(text = "各国人员流动堆积图", align="center") %>%
hc_xAxis(categories = xlabel) %>%
hc_plotOptions(column = list(
#将两组值合并在一起,默认值是TRUE
grouping = FALSE,
#设置图的阴影,默认值是FALSE
shadow = FALSE,
#合并后柱子的边界宽度
borderWidth = 0)) %>%
hc_add_series(
#名称
name = "新进",
data = Entry,
type = "column",
color = "#D87A80",
opacity = 0.8,
#合并后柱子的相对粗细,结合第二个“hc_add_series”中的该参数进行相对调整
pointPadding = 0.1,
#轴值与柱子中心线的相对位置
pointPlacement = 0) %>%
hc_add_series(
name = "离职",
data = Exit,
type = "column",
color = "#7e5686",
opacity = 0.9,
#设置透明度为0.9,可以稍微减少第一个柱子被遮挡的程度
# color = "rgba(165,170,217,0.9)",
pointPadding = 0.2,
pointPlacement = 0) %>%
hc_add_theme(hc_theme_flat(chart = list(
style = list(fontFamily = "Source Han Serif")
))) %>%
hc_credits(enabled = T)
```
### 各国 Work_Authorisation 情况
```{r}
country_wa<-df_all%>%group_by(Country,Work_Authorisation)%>%count(Work_Authorisation)%>%arrange(desc(n))
all_combinations <- expand.grid(
Country = unique(country_wa$Country),
Work_Authorisation = unique(country_wa$Work_Authorisation)
)
# 将组合与原始数据集进行合并
country_wa <- merge(all_combinations, country_wa , all = TRUE)
# 将缺失的 n 值填充为 0
country_wa$n[is.na(country_wa $n)] <- 0
# 打印结果
xl<-unique(country_wa$Country)
highchart() %>%
#hc_chart(zoomType = 'xy') %>%
hc_title(text = '各国 Work Authorisation 分布情况') %>%
hc_xAxis(categories = xl, crosshair = T) %>%
hc_yAxis_multiples(list(title = list(text = '各国Work Authorisation人数'),
opposite = FALSE)
) %>%
hc_colors(colors = c("#CC6699", "#FFCC66", "#99CC99", "#666699",'#D87A80','#660000','#3399CC')) %>%
hc_add_series(data = pull(subset(country_wa, Work_Authorisation == "Citizen" ), n),yAxis = 0, name = "Citizen", type = "column") %>%
hc_add_series(data = pull(subset(country_wa, Work_Authorisation == "Work Permit"), n),yAxis = 0, name = "Work Permit", type = "column") %>%
hc_add_series(data = pull(subset(country_wa, Work_Authorisation == "unknown"), n),yAxis = 0, name = "unknown", type = "column") %>%
hc_add_series(data = pull(subset(country_wa, Work_Authorisation == "Permanent Resident"), n),yAxis = 0, name = "Permanent Resident", type = "column") %>%
hc_plotOptions(column = list(stacking = 'normal'),
dataLabels = list(enabled = T,
style = list(textOutline = '1px 1px black'))) %>%
hc_add_theme(hc_theme_flat(chart = list(
style = list(fontFamily = "Source Han Serif")
))) %>%
hc_credits(enabled = T)
```
### 人员流动分布桑基图
```{r}
s1<-cbind(df_all[,c(16)],df_all[,c(8)],df_all[,c(11)])
s11<-s1%>% dplyr::filter(Type_of_movement == 'Entry') %>%dplyr::count(Org)%>%dplyr::mutate(Type_of_movement = 'Entry')
s12<-s1%>% dplyr::filter(Type_of_movement == 'Exit') %>%dplyr::count(Org)%>%dplyr::mutate(Type_of_movement = 'Exit')
s_1<-rbind(s11,s12)
s21<-s1%>% dplyr::filter(Type_of_movement == 'Entry') %>%dplyr::count(Country)%>%dplyr::mutate(Type_of_movement = 'Entry')
s22<-s1%>% dplyr::filter(Type_of_movement == 'Exit') %>%dplyr::count(Country)%>%dplyr::mutate(Type_of_movement = 'Exit')
s_2<-rbind(s21,s22)
s_2[, c("Country", "Type_of_movement")] <- s_2[, c("Type_of_movement", "Country")]
colnames(s_2) <- c('Org','n','Type_of_movement')
ss=rbind(s_1,s_2)
highchart() %>%
# hc_title(text = "人员流动分布桑基图") %>%
hc_add_series(data = ss,type = "sankey",hcaes(from = Type_of_movement,to = Org,weight = n)) %>%
hc_colors(colors = c("#CC6699", "#FFCC66", "#99CC99", "#666699",'#D87A80','#3399CC','#660000'))%>%
hc_add_theme(hc_theme_flat(chart = list(
style = list(fontFamily = "Source Han Serif")
))) %>%
hc_credits(enabled = T)
```
统计分析 {.storyboard}
=========================================
### Typeofmovement 卡方分析(相关性){data-height=400}
```{r}
qp<-list('Studio','area','Employment_type','Client','Country','Job_Title','Work_Authorisation','Org','Manager_EMP_ID')
results_df<-data.frame(Statistics = character(), `p_value` = numeric(), stringsAsFactors = FALSE)
for (varies in qp ){
var_table<-table(df_all[['Type_of_movement']], df_all[[varies]])
sq_results<-chisq.test(var_table)
new_row <- data.frame(Statistics = varies, `p_value` = sq_results[['p.value']], stringsAsFactors = FALSE)
results_df<-rbind(results_df,new_row)
}
results_df$p_value <- round(results_df$p_value, 5)
results_df<-results_df%>%arrange(p_value)
results_df%>%
reactable::reactable(pagination = TRUE,
compact = T,
striped = TRUE,highlight = TRUE,borderless = TRUE)%>%add_title("Typeofmovement 卡方分析")
```
***
* 卡方分析
* 在显著性水平α=0.05下
* "Work_Authorisation"、"Employment_type"和"Studio"变量的p值小于α
* 说明此三个变量与员工离入职情况之间存在显著关联,有可能是决定员工离职的重要因素,因此对此变量展开进一步研究
### 对日期进行参数检验
```{r}
ls_factor=list('Date_of_work','Date_of_Hire')
ut_results<-data.frame(Statistic=character(),U_=numeric(),T_=numeric())
for (i in ls_factor){
U_result<- wilcox.test(table(df_all[['Type_of_movement']], df_all[[i]]))
T_result<- t.test(table(df_all[['Type_of_movement']], df_all[[i]]))
new_row<-data.frame(Statistics = i, U_ = ifelse(format(U_result[['p.value']], scientific = FALSE)<=0.001,'***',format(U_result[['p.value']], scientific = FALSE)), T_ =ifelse(format(T_result[['p.value']], scientific = FALSE)<=0.001,'***',format(T_result[['p.value']], scientific = FALSE)))
ut_results<-rbind(ut_results,new_row)
}
ut_results%>%reactable::reactable(pagination = TRUE,
compact = T,
striped = TRUE,highlight = TRUE,borderless = TRUE)%>%add_title("参数检验")
```
***
- 在显著性水平α=0.01下
- "Date_of_work"、"Date_of_Hire"变量的p值小于α
- 说明就职时长与员工离入职情况之间存在显著关联,
### 离入职员工在职时间及天数统计
```{r}
df_date<-data.frame(Date_of_work_entry=c(97,223.7,363),Date_of_work_exit=c(95,1340,3072),Date_of_Hire_Entry=c('2019-01-02','2019-05-21','2019-09-25' ),Date_of_Hire_exit=c('2011-03-15','2015-07-29','2018-12-19'),Date_of_exit=c('2019-01-03','2019-03-30','2019-08-16'))
rownames(df_date)<-c('min','mean','max')
df_date%>%
reactable::reactable(pagination = TRUE,
compact = T,
striped = TRUE,highlight = TRUE,borderless = TRUE)%>%add_title("员工离入职时间")
```
***
离入职时间
- 可以发现最早入职的员工为2011年3月入职,最晚入职的员工为2019年9月入职
- 有47位员工离职,离职员工中最早离职时间为2019年1月3日,最晚离职时间为2019年8月16日,平均离职时间为2019年3月30日,最早入职时间为2011年3月15日,最晚入职时间为2018年12月19日,平均入职时间为2015年7月29日,截至2019年12月31日,在职时间最长为3072天,最短为95天,平均在职时长为1340天
- 有93为员工新入职,入职员工最早入职为2019年1月2日,最晚入职为2019年9月25日,截至2019年12月31日,在职时长最长为363天,最短为97天,平均在职时长为223天
- 截止2019年12月31日所有离在职员工平均入职时间为598天
### 逐步回归变量进行逻辑回归建模 {data-commentary-width=400}
```{r,warning=FALSE}
library(caret)
set.seed(2021212254)
df_all$Type_of_movement <- as.factor(df_all$Type_of_movement)
sample <- sample(c(TRUE, FALSE), nrow(df_all), replace=TRUE, prob=c(0.7,0.3))
train <- df_all[sample, ]
test <- df_all[!sample, ]
logic_model <- glm(Type_of_movement ~ Work_Authorisation + Employment_type + Studio + area + Country + Date_of_work + Client + Manager_EMP_ID + Org + Job_Title, data = train, family = "binomial")
model2<-step(object = logic_model,trace = 0)
# stargazer::stargazer(model2,type='text')
# library(pscl)
# anova(object = model2,test = "Chisq")
# pscl::pR2(model2)["McFadden"][1]
```
library(caret)
set.seed(2021212254)
df_all$Type_of_movement <- as.factor(df_all$Type_of_movement)
sample <- sample(c(TRUE, FALSE), nrow(df_all), replace=TRUE, prob=c(0.7,0.3))
train <- df_all[sample, ]
test <- df_all[!sample, ]
logic_model <- glm(Type_of_movement ~ Work_Authorisation + Employment_type + Studio + area + Country + Date_of_work + Client + Manager_EMP_ID + Org + Job_Title, data = train, family = "binomial")
model2<-step(object = logic_model,trace = 0)
***
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Dependent variable:
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Type_of_movement
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Work_AuthorisationPermanent Resident 3.864\*\*\*\
(1.491)
Work_Authorisationunknown 21.890\
(5,184.903)
Work_AuthorisationWork Permit -16.837\
(4,941.803)
Date_of_work 0.012\*\*\*\
(0.004)
Constant -6.444\*\*\*\
(1.631)
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Observations 94\
Log Likelihood -10.552\
Akaike Inf. Crit. 31.104\
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
模型的 McFadden R方为 0.8249651
### AUC指标
```{r}
library(pROC)
prob<-predict(object =model2,newdata=test,type = "response")
pred<-ifelse(prob>=0.5,"Exit","Entry")
pred<-factor(pred,levels = c("Entry","Exit"),order=TRUE)
f<-table(test$Type_of_movement,pred)
roc_obj <- roc(test$Type_of_movement, as.numeric(pred))
# 绘制ROC曲线
plot(roc_obj,print.auc=TRUE,auc.polygon=TRUE,
grid=c(0.1,0.2),grid.col=c("green","red"),max.auc.polygon=TRUE,
auc.polygon.col="lightblue",print.thres="best")
plot(roc_obj, add=TRUE, col="red")
```
***
得到结论
- 变量均显著,模型通过检验且有效
- McFadden R方为0.8249,模型拟合性能尚可
- 通过系数大小可以发现,Work_Authorisation中的'work permit'降低员工离职的可能性,而Work_Authorisation中的Permanent Resident 、unknown 以及在职时长Date_of_work 均会增加员工离职可能,其中Work_Authorisation为unknown对员工离职影响大于Permanent Resident
- 且AUC值为0.88,较为合理,能够较为准确预测员工是否将会离职
# 新进员工分析 {data-icon="fa-user-plus"}
## row {data-height="300"}
### 新进员工雇佣类型分布
```{r}
entry <- df_all %>% dplyr::filter(Type_of_movement == 'Entry')
entry_movement <-entry %>%dplyr::count(Employment_type)
colnames(entry_movement) <- c('name','y')
highchart() %>%
hc_chart(type = "pie") %>%
hc_title(text = "新进员工雇佣类型分布") %>%
hc_add_series(data = entry_movement,
innerSize = '60%',
colorByPoint = TRUE,
colors=c('#D87A80','#807DBA' )) %>%
hc_tooltip(pointFormat = '{series.name}: <b>{point.percentage:.1f}%</b>') %>%
hc_title(
style = list(fontFamily = "SourceHanSerifSC-Medium")) %>%
hc_subtitle(style = list(fontFamily = "SourceHanSerifSC-Medium")) %>%
hc_add_theme(hc_theme_flat()) %>%
hc_plotOptions(pie = list(
allowPointSelect = TRUE,
cursor = 'pointer',
dataLabels = list(
enabled = T,
format = '<b>{point.name}</b>: {point.percentage:.1f} %'
),
showInLegend = T
))
```
### 不同国家的新进员工隶属单位分布玫瑰图
```{r}
rosedata1 <- entry %>% dplyr::filter(Country == "Romania") %>%
group_by(Org) %>%
dplyr::count()%>%dplyr::mutate(Country = "Romania")
rosedata2 <- entry %>% dplyr::filter(Country == "India") %>%
group_by(Org) %>%
dplyr::count()%>%dplyr::mutate(Country = "India")
rosedata3 <- entry %>% dplyr::filter(Country == "USA") %>%
group_by(Org) %>%
dplyr::count()%>%dplyr::mutate(Country = "USA")
rosedata4 <- entry %>% dplyr::filter(Country == "Australia") %>%
group_by(Org) %>%
dplyr::count()%>%dplyr::mutate(Country = "Australia")
rosedata5 <- entry %>% dplyr::filter(Country == "Argentina") %>%
group_by(Org) %>%
dplyr::count()%>%dplyr::mutate(Country = "Argentina")
rosedata6 <- entry %>% dplyr::filter(Country == "Canada") %>%
group_by(Org) %>%
dplyr::count()%>%dplyr::mutate(Country = "Canada")
rosedata7 <- entry %>% dplyr::filter(Country == "Brazil") %>%
group_by(Org) %>%
dplyr::count()%>%dplyr::mutate(Country = "Brazil")
rose_data<-rbind(rosedata1,rosedata2,rosedata3,rosedata4,rosedata5,rosedata6,rosedata7)
colnames(rose_data) <- c('variable','value','Country')
highchart() %>%
#polar设置极坐标
hc_chart(polar = TRUE,type = "column") %>%
hc_title(text = "不同国家的新进员工隶属单位分布",x=-50) %>%
#设置图形大小
hc_pane(size = "85%") %>%
#设置图例位置
hc_legend(align = "right",verticalAlign = "top",y = 100,layout = "vertical") %>%
hc_yAxis(title = list(text = "占比 (%)",
#设置轴标题的位置
x= 0,
y=-30),
#颠倒堆积的顺序
reversedStacks = FALSE) %>%
hc_xAxis(categories = unique(rose_data$Country))%>%
hc_tooltip(
#提示框的值显示百分比
valueSuffix = "%") %>%
hc_plotOptions(series = list(stacking = "normal",pointPlacement= "on",groupPadding = 0,shadow = FALSE))%>%
hc_colors( colors=c('#3399CC','#D87A80','#807DBA','#663366' ))%>%
hc_add_series(data=rose_data,type = "column",hcaes(y=value,group = variable)) %>%
hc_add_theme(hc_theme_flat(chart = list(
style = list(fontFamily = "Source Han Serif")
)))
```
### 新进员工的工作授权分布
```{r}
entry_Work_Authorisation <-entry %>%dplyr::count(Work_Authorisation)
colnames(entry_Work_Authorisation) <- c('name','y')
highchart() %>%
hc_chart(type = "pie") %>%
hc_title(text = "新进员工的工作授权分布") %>%
hc_add_series(data = entry_Work_Authorisation,
innerSize = '60%',
colorByPoint = TRUE,
colors=c('#807DBA','#3399CC','#D87A80' )) %>%
hc_tooltip(pointFormat = '{series.name}: <b>{point.percentage:.1f}%</b>') %>%
hc_title(
style = list(fontFamily = "SourceHanSerifSC-Medium")) %>%
hc_subtitle(style = list(fontFamily = "SourceHanSerifSC-Medium")) %>%
hc_add_theme(hc_theme_flat()) %>%
hc_plotOptions(pie = list(
allowPointSelect = TRUE,
cursor = 'pointer',
dataLabels = list(
enabled = T,
format = '<b>{point.name}</b>: {point.percentage:.1f} %'
),
showInLegend = T
))
```
## row {data-height="500"}
### 新进员工雇佣时间分布
```{r}
entry1 <- entry %>%
tidyr::separate( col = Date_of_Hire,
into = c('year','month','day'),
sep = '-'
)
month_entry<-entry %>% dplyr::mutate( year = lubridate::year(Date_of_Hire),
month = lubridate::month(Date_of_Hire)
) %>%
group_by(year,month) %>%
dplyr::count()
month_entry%>%
pull(month) -> monthlist
month_entry %>%
dplyr::filter(month %in% monthlist) %>%
hchart(type = "line", hcaes(x = month, y = n, group = year)) %>%
hc_title(text = "新进员工雇佣时间分布") %>%
hc_add_theme(hc_theme_flat(chart = list(
style = list(fontFamily = "Source Han Serif")
))) %>%
hc_colors(colors = c('#D87A80'))%>%
hc_credits(enabled = T) %>%
hc_legend(align = 'left',
floating = TRUE,
layout = 'vertical',
x = 80, y = -100)
```
### 每月总体以及各国新进人数分布
```{r}
entry_1 <- entry1 %>% dplyr::filter(Country == "Argentina") %>%
group_by(month) %>%
dplyr::count()%>%dplyr::mutate(Country = "Argentina")
entry_2 <- entry1 %>% dplyr::filter(Country == "Australia") %>%
group_by(month) %>%
dplyr::count()%>%dplyr::mutate(Country = "Australia")
entry_3 <- entry1 %>% dplyr::filter(Country == "Brazil") %>%
group_by(month) %>%
dplyr::count()%>%dplyr::mutate(Country = "Brazil")
entry_4 <- entry1 %>% dplyr::filter(Country == "Canada") %>%
group_by(month) %>%
dplyr::count()%>%dplyr::mutate(Country = "Canada")
entry_5 <- entry1 %>% dplyr::filter(Country == "India") %>%
group_by(month) %>%
dplyr::count()%>%dplyr::mutate(Country = "India")
entry_6 <- entry1 %>% dplyr::filter(Country == "Romania") %>%
group_by(month) %>%
dplyr::count()%>%dplyr::mutate(Country = "Romania")
entry_7 <- entry1 %>% dplyr::filter(Country == "USA") %>%
group_by(month) %>%
dplyr::count()%>%dplyr::mutate(Country = "USA")
#entry_data<-rbind(entry_1,entry_2,entry_3,entry_4,entry_5,entry_6,entry_7)
#colnames(entry_data) <- c('month','value','Country')
entry_1<-data.frame(
month=c('1','2','3','4','5','6','7','8','9'),
Country = c('Argentina','Argentina',' Argentina',' Argentina','Argentina','Argentina','Argentina','Argentina','Argentina'),
n = c(1,1,0,2,0,1,0,1,1))
entry_2<-data.frame(
month=c('1','2','3','4','5','6','7','8','9'),
Country = c('Australia','Australia',' Australia',' Australia','Australia','Australia','Australia','Australia','Australia'),
n = c(2,1,0,0,2,1,1,3,0))
entry_3<-data.frame(
month=c('1','2','3','4','5','6','7','8','9'),
Country = c('Brazil','Brazil',' Brazil',' Brazil','Brazil','Brazil','Brazil','Brazil','Brazil'),
n = c(0,0,0,0,1,0,0,0,0))
entry_4<-data.frame(
month=c('1','2','3','4','5','6','7','8','9'),
Country = c('Canada','Canada',' Canada',' Canada','Canada','Canada','Canada','Canada','Canada'),
n = c(1,0,1,1,1,0,0,0,0))
entry_5<-data.frame(
month=c('1','2','3','4','5','6','7','8','9'),
Country = c('India','India',' India',' India','India','India','India','India','India'),
n = c(4,1,0,3,2,4,1,5,0))
entry_6<-data.frame(
month=c('1','2','3','4','5','6','7','8','9'),
Country = c('Romania','Romania','Romania','Romania','Romania','Romania','Romania','Romania','Romania'),
n = c(3,1,6,2,2,5,6,6,4))
entry_7<-data.frame(
month=c('1','2','3','4','5','6','7','8','9'),
Country = c('USA','USA',' USA',' USA','USA','USA','USA','USA','USA'),
n = c(1,1,3,1,1,4,1,1,3))
colnames(entry_1) <- c('month','Country','value')
colnames(entry_2) <- c('month','Country','value')
colnames(entry_3) <- c('month','Country','value')
colnames(entry_4) <- c('month','Country','value')
colnames(entry_5) <- c('month','Country','value')
colnames(entry_6) <- c('month','Country','value')
colnames(entry_7) <- c('month','Country','value')
total_month_entry<-entry1%>%group_by(month) %>%
dplyr::count()
unique(entry_7$month) %>%
str_sub(1, 7) -> xlabel
highchart() %>%
#hc_chart(zoomType = 'xy') %>%
hc_title(text = '每月总体以及各国新进人数分布') %>%
hc_xAxis(categories = xlabel, crosshair = T) %>%
hc_yAxis_multiples(list(title = list(text = '各国新进人数'),
opposite = FALSE)
,list(title = list(text = '总体新进人数',
style = list(color = "black")),
labels = list(style = list(color = "black")),
opposite = TRUE)
) %>%
hc_colors(colors = c("#CC6699", "#FFCC66", "#99CC99", "#666699",'#D87A80','#660000','#3399CC')) %>%
hc_add_series(data = pull(subset(entry_1, Country = "Argentina"), value),yAxis = 0, name = "Argentina", type = "column") %>%
hc_add_series(data = pull(subset(entry_2, Country = "Australia"), value),yAxis = 0, name = "Australia", type = "column") %>%
hc_add_series(data = pull(subset(entry_3, Country = "Brazil"), value),yAxis = 0, name = "Brazil", type = "column") %>%
hc_add_series(data = pull(subset(entry_4, Country = "Canada"), value),yAxis = 0, name = "Canada", type = "column") %>%
hc_add_series(data = pull(subset(entry_5, Country = "India"), value), yAxis = 0, name = "India", type = "column") %>%
hc_add_series(data = pull(subset(entry_6, Country = "Romania"), value),yAxis = 0, name = "Romania", type = "column") %>%
hc_add_series(data = pull(subset(entry_7, Country = "USA"), value), yAxis = 0, name = "USA", type = "column") %>%
hc_add_series(data = total_month_entry$n,
yAxis = 1, name = "新进人数", type = "spline",color='#996699') %>%
hc_plotOptions(column = list(stacking = 'normal'),
dataLabels = list(enabled = T,
style = list(textOutline = '1px 1px black'))) %>%
hc_add_theme(hc_theme_flat(chart = list(
style = list(fontFamily = "Source Han Serif")
))) %>%
hc_credits(enabled = T)
```
## Row {data-height="400"}
### 新进人员的负责客户分布
```{r}
entry_client<-entry%>%group_by(Client) %>%
dplyr::count()
#entry_studio<-entry%>%group_by(Studio) %>%
# dplyr::count()
entry_client <- entry_client[order(-entry_client$n),]
top7_entry_client <- entry_client[1:7,]
other_n_sum <- sum(entry_client[-(1:7), "n"])
entry_client <- data.frame(Client = "其它", n = other_n_sum)
entry_client <- rbind(top7_entry_client, entry_client)
highchart() %>%
hc_chart(type = "bar") %>%
hc_title(text = "新进人员的负责客户分布") %>%
hc_colors(colors = c('#D87A80')) %>%
hc_xAxis(categories = entry_client$Client) %>%
hc_yAxis(title = list(text = "人数")) %>%
hc_add_series(name = "Client", data = entry_client$n) %>%
hc_plotOptions(column = list(stacking = 'normal'),
dataLabels = list(enabled = T,
style = list(textOutline = '1px 1px black'))) %>%
hc_add_theme(hc_theme_flat(chart = list(
style = list(fontFamily = "Source Han Serif")
))) %>%
hc_tooltip(
shared = TRUE,
pointFormat = "<b>{point.y}</b>"
)
```
### 新进人员数量最多的八个工作室
```{r}
entry_studio<-entry%>%group_by(Studio) %>%
dplyr::count()
entry_studio <- entry_studio[order(-entry_studio$n),]
entry_studio <- entry_studio[1:8,]
#other_n_sum <- sum(entry_studio[-(1:7), "n"])
#entry_studio <- data.frame(Client = "其它", n = other_n_sum)
#entry_studio <- rbind(top7_entry_studio, entry_studio)
highchart() %>%
hc_chart(type = "bar") %>%
hc_title(text = "新进人员数量最多的八个工作室") %>%
hc_colors(colors = c('#666699')) %>%
hc_xAxis(categories = entry_studio$Studio) %>%
hc_yAxis(title = list(text = "人数")) %>%
hc_add_series(name = "Studio", data = entry_studio$n) %>%
hc_plotOptions(column = list(stacking = 'normal'),
dataLabels = list(enabled = T,
style = list(textOutline = '1px 1px black'))) %>%
hc_add_theme(hc_theme_flat(chart = list(
style = list(fontFamily = "Source Han Serif")
))) %>%
hc_tooltip(
shared = TRUE,
pointFormat = "<b>{point.y}</b>"
)
```
# 离职员工分析{data-icon="fa-user-minus"}
## row {data-height="300"}
### 离职员工雇佣类型分布 {data-width=400}
```{r}
exit <- df_all %>% dplyr::filter(Type_of_movement == 'Exit')
exit$Date_of_Hire <- as.Date(exit$Date_of_Hire)
exit_movement <-exit %>%dplyr::count(Employment_type)
colnames(exit_movement) <- c('name','y')
highchart() %>%
hc_chart(type = "pie") %>%
hc_title(text = "离职员工雇佣类型分布") %>%
hc_add_series(data = exit_movement,
innerSize = '60%',
colorByPoint = TRUE,
colors=c('#D87A80','#807DBA' )) %>%
hc_tooltip(pointFormat = '{series.name}: <b>{point.percentage:.1f}%</b>') %>%
hc_title(
style = list(fontFamily = "SourceHanSerifSC-Medium")) %>%
hc_subtitle(style = list(fontFamily = "SourceHanSerifSC-Medium")) %>%
hc_add_theme(hc_theme_flat()) %>%
hc_plotOptions(pie = list(
allowPointSelect = TRUE,
cursor = 'pointer',
dataLabels = list(
enabled = T,
format = '<b>{point.name}</b>: {point.percentage:.1f} %'
),
showInLegend = T
))
```
### 离职员工隶属单位分布
```{r}
exit_org <-exit %>%dplyr::count(Org)
colnames(exit_org) <- c('name','y')
highchart() %>%
hc_chart(type = "pie") %>%
hc_title(text = "离职员工隶属单位分布") %>%
hc_add_series(data = exit_org,
innerSize = '60%',
colorByPoint = TRUE,
colors=c('#3399CC','#D87A80','#663366','#807DBA')) %>%
hc_tooltip(pointFormat = '{series.name}: <b>{point.percentage:.1f}%</b>') %>%
hc_title(
style = list(fontFamily = "SourceHanSerifSC-Medium")) %>%
hc_subtitle(style = list(fontFamily = "SourceHanSerifSC-Medium")) %>%
hc_add_theme(hc_theme_flat()) %>%
hc_plotOptions(pie = list(
allowPointSelect = TRUE,
cursor = 'pointer',
dataLabels = list(
enabled = T,
format = '<b>{point.name}</b>: {point.percentage:.1f} %'
),
showInLegend = T
))
```
### 离职员工的工作授权分布
```{r}
exit_Work_Authorisation <-exit %>%dplyr::count(Work_Authorisation)
colnames(exit_Work_Authorisation) <- c('name','y')
highchart() %>%
hc_chart(type = "pie") %>%
hc_title(text = "离职员工的工作授权分布") %>%
hc_add_series(data = exit_Work_Authorisation,
innerSize = '60%',
colorByPoint = TRUE,
colors=c('#D87A80','#3399CC','#807DBA' )) %>%
hc_tooltip(pointFormat = '{series.name}: <b>{point.percentage:.1f}%</b>') %>%
hc_title(
style = list(fontFamily = "SourceHanSerifSC-Medium")) %>%
hc_subtitle(style = list(fontFamily = "SourceHanSerifSC-Medium")) %>%
hc_add_theme(hc_theme_flat()) %>%
hc_plotOptions(pie = list(
allowPointSelect = TRUE,
cursor = 'pointer',
dataLabels = list(
enabled = T,
format = '<b>{point.name}</b>: {point.percentage:.1f} %'
),
showInLegend = T
))
```
## row {data-height=500}
### 离职员工总在职日期分布
```{r}
exit$Date_of_Exit <- as.Date(exit$Date_of_Exit)
exit$Date_of_Hire <- as.Date(exit$Date_of_Hire)
exit$days_diff <- as.numeric(exit$Date_of_Exit - exit$Date_of_Hire)
days<-exit$days_diff
cut_days <- cut(days, breaks = seq(0, ceiling(max(days)/500)*500, by = 500),
labels = c("0-500","500-1000","1000-1500","1500-2000","2000-2500","2500-3000","3000-3500")
)
# Print the result
cut_days <- as.data.frame(cut_days)
cut_days<-cut_days %>%group_by(cut_days) %>%
dplyr::count()
highchart() %>%
hc_chart(type = "bar") %>%
hc_title(text = "离职员工总在职日期分布") %>%
hc_colors(colors = c('#D87A80')) %>%
hc_xAxis(categories = cut_days$cut_days) %>%
hc_yAxis(title = list(text = "人数")) %>%
hc_add_series(name = "days", data = cut_days$n) %>%
hc_plotOptions(column = list(stacking = 'normal'),
dataLabels = list(enabled = T,
style = list(textOutline = '1px 1px black'))) %>%
hc_add_theme(hc_theme_flat(chart = list(
style = list(fontFamily = "Source Han Serif")
))) %>%
hc_tooltip(
shared = TRUE,
pointFormat = "<b>{point.y}</b>"
)
```
### 离职人员的负责客户分布
```{r}
exit_client<-exit%>%group_by(Client) %>%
dplyr::count()
#entry_studio<-entry%>%group_by(Studio) %>%
# dplyr::count()
exit_client <- exit_client[order(-exit_client$n),]
top7_exit_client <- exit_client[1:7,]
other_n_sum <- sum(exit_client[-(1:7), "n"])
exit_client <- data.frame(Client = "其它", n = other_n_sum)
exit_client <- rbind(top7_exit_client, exit_client)
highchart() %>%
hc_chart(type = "bar") %>%
hc_title(text = "离职人员的负责客户分布") %>%
hc_colors(colors = c('#D87A80')) %>%
hc_xAxis(categories = exit_client$Client) %>%
hc_yAxis(title = list(text = "人数")) %>%
hc_add_series(name = "Client", data = exit_client$n) %>%
hc_plotOptions(column = list(stacking = 'normal'),
dataLabels = list(enabled = T,
style = list(textOutline = '1px 1px black'))) %>%
hc_add_theme(hc_theme_flat(chart = list(
style = list(fontFamily = "Source Han Serif")
))) %>%
hc_tooltip(
shared = TRUE,
pointFormat = "<b>{point.y}</b>"
)
```
## row {data-height=500}
### 整体以及不同国家离职员工人数关于离职月份的分布
```{r}
exit2 <- exit %>%
tidyr::separate( col = Date_of_Exit,
into = c('year','month','day'),
sep = '-'
)
df <- data.frame(month = character(8))
df[, 1] <- c('01','02','03','04','05','06','07','08')
df$month<-as.character(df$month)
df1<-df%>%dplyr::mutate(n=0)%>%dplyr::mutate(Country = "Australia")
exit_1 <- exit2 %>% dplyr::filter(Country == "Australia") %>%
group_by(month) %>%
dplyr::count()%>%dplyr::mutate(Country = "Australia")
exit_1<-rbind(exit_1,df1)
exit_1$n<-as.numeric(exit_1$n)
exit_1<-exit_1 %>% group_by(month) %>%summarise(n=sum(n))%>%dplyr::mutate(Country = "Australia")
df2<-df%>%dplyr::mutate(n=0)%>%dplyr::mutate(Country = "India")
exit_2 <- exit2 %>% dplyr::filter(Country == "India") %>%
group_by(month) %>%
dplyr::count()%>%dplyr::mutate(Country = "India")
exit_2<-rbind(exit_2,df2)
exit_2$n<-as.numeric(exit_2$n)
exit_2<-exit_2 %>% group_by(month) %>%summarise(n=sum(n))%>%dplyr::mutate(Country = "India")
df3<-df%>%dplyr::mutate(n=0)%>%dplyr::mutate(Country = "Romania")
exit_3 <- exit2 %>% dplyr::filter(Country == "Romania") %>%
group_by(month) %>%
dplyr::count()%>%dplyr::mutate(Country = "Romania")
exit_3<-rbind(exit_3,df3)
exit_3$n<-as.numeric(exit_3$n)
exit_3<-exit_3 %>% group_by(month) %>%summarise(n=sum(n))%>%dplyr::mutate(Country = "Romania")
df4<-df%>%dplyr::mutate(n=0)%>%dplyr::mutate(Country = "USA")
exit_4 <- exit2 %>% dplyr::filter(Country == "USA") %>%
group_by(month) %>%
dplyr::count()%>%dplyr::mutate(Country = "USA")
exit_4<-rbind(exit_4,df4)
exit_4$n<-as.numeric(exit_4$n)
exit_4<-exit_4 %>% group_by(month) %>%summarise(n=sum(n))%>%dplyr::mutate(Country = "USA")
colnames(exit_1) <- c('month','value','Country')
colnames(exit_2) <- c('month','value','Country')
colnames(exit_3) <- c('month','value','Country')
colnames(exit_4) <- c('month','value','Country')
total_month_exit<-exit2%>%group_by(month) %>%
dplyr::count()
unique(exit_1$month) %>%
str_sub(1, 4) -> xlabel
highchart() %>%
hc_xAxis(categories = xlabel, crosshair = T) %>%
hc_yAxis_multiples(list(title = list(text = '各国离职人数'),
opposite = FALSE)
,list(title = list(text = '总体离职人数',
style = list(color = "black")),
labels = list(style = list(color = "black")),
opposite = TRUE)
) %>%
# hc_title(text = "整体以及不同国家离职员工人数关于离职月份的分布") %>%
hc_colors(colors = c("#CC6699", "#FFCC66", "#D87A80", "#666699",'#D87A80','#660000','#3399CC')) %>%
hc_add_series(data = pull(subset(exit_1, Country = "Australia"), value),yAxis = 0, name = "Australia", type = "column") %>%
hc_add_series(data = pull(subset(exit_2, Country = "India"), value),yAxis = 0, name = "India", type = "column") %>%
hc_add_series(data = pull(subset(exit_3, Country = "Romania"), value),yAxis = 0, name = "Romania", type = "column") %>%
hc_add_series(data = pull(subset(exit_4, Country = "USA"), value),yAxis = 0, name = "USA", type = "column") %>%
hc_add_series(data = total_month_exit$n,
yAxis = 1, name = "离职人数", type = "spline",color='#996699') %>%
hc_plotOptions(column = list(stacking = 'normal'),
dataLabels = list(enabled = T,
style = list(textOutline = '1px 1px black'))) %>%
hc_add_theme(hc_theme_flat(chart = list(
style = list(fontFamily = "Source Han Serif")
))) %>%
hc_credits(enabled = T)
```
### 整体以及不同国家离职员工人数关于入职年份的分布
```{r}
exit1 <- exit %>%
tidyr::separate( col = Date_of_Hire,
into = c('year','month','day'),
sep = '-'
)
df <- data.frame(year = character(8))
df[, 1] <- 2011:2018
df$year<-as.character(df$year)
df1<-df%>%dplyr::mutate(n=0)%>%dplyr::mutate(Country = "Australia")
exit_1 <- exit1 %>% dplyr::filter(Country == "Australia") %>%
group_by(year) %>%
dplyr::count()%>%dplyr::mutate(Country = "Australia")
exit_1<-rbind(exit_1,df1)
exit_1$n<-as.numeric(exit_1$n)
exit_1<-exit_1 %>% group_by(year) %>%summarise(n=sum(n))%>%dplyr::mutate(Country = "Australia")
df2<-df%>%dplyr::mutate(n=0)%>%dplyr::mutate(Country = "India")
exit_2 <- exit1 %>% dplyr::filter(Country == "India") %>%
group_by(year) %>%
dplyr::count()%>%dplyr::mutate(Country = "India")
exit_2<-rbind(exit_2,df2)
exit_2$n<-as.numeric(exit_2$n)
exit_2<-exit_2 %>% group_by(year) %>%summarise(n=sum(n))%>%dplyr::mutate(Country = "India")
df3<-df%>%dplyr::mutate(n=0)%>%dplyr::mutate(Country = "Romania")
exit_3 <- exit1 %>% dplyr::filter(Country == "Romania") %>%
group_by(year) %>%
dplyr::count()%>%dplyr::mutate(Country = "Romania")
exit_3<-rbind(exit_3,df3)
exit_3$n<-as.numeric(exit_3$n)
exit_3<-exit_3 %>% group_by(year) %>%summarise(n=sum(n))%>%dplyr::mutate(Country = "Romania")
df4<-df%>%dplyr::mutate(n=0)%>%dplyr::mutate(Country = "USA")
exit_4 <- exit1 %>% dplyr::filter(Country == "USA") %>%
group_by(year) %>%
dplyr::count()%>%dplyr::mutate(Country = "USA")
exit_4<-rbind(exit_4,df4)
exit_4$n<-as.numeric(exit_4$n)
exit_4<-exit_4 %>% group_by(year) %>%summarise(n=sum(n))%>%dplyr::mutate(Country = "USA")
colnames(exit_1) <- c('year','value','Country')
colnames(exit_2) <- c('year','value','Country')
colnames(exit_3) <- c('year','value','Country')
colnames(exit_4) <- c('year','value','Country')
total_year_exit<-exit1%>%group_by(year) %>%
dplyr::count()
unique(exit_1$year) %>%
str_sub(1, 4) -> xlabel
highchart() %>%
hc_title(text = "整体以及不同国家离职员工人数关于入职年份的分布") %>%
hc_xAxis(categories = xlabel, crosshair = T) %>%
hc_yAxis_multiples(list(title = list(text = '各国离职人数'),
opposite = FALSE)
,list(title = list(text = '总体离职人数',
style = list(color = "black")),
labels = list(style = list(color = "black")),
opposite = TRUE)
) %>%
hc_colors(colors = c("#CC6699", "#FFCC66", "#D87A80", "#666699",'#D87A80','#660000','#3399CC')) %>%
hc_add_series(data = pull(subset(exit_1, Country = "Australia"), value),yAxis = 0, name = "Australia", type = "column") %>%
hc_add_series(data = pull(subset(exit_2, Country = "India"), value),yAxis = 0, name = "India", type = "column") %>%
hc_add_series(data = pull(subset(exit_3, Country = "Romania"), value),yAxis = 0, name = "Romania", type = "column") %>%
hc_add_series(data = pull(subset(exit_4, Country = "USA"), value),yAxis = 0, name = "USA", type = "column") %>%
hc_add_series(data = total_year_exit$n,
yAxis = 1, name = "离职人数", type = "spline",color='#996699') %>%
hc_plotOptions(column = list(stacking = 'normal'),
dataLabels = list(enabled = T,
style = list(textOutline = '1px 1px black'))) %>%
hc_add_theme(hc_theme_flat(chart = list(
style = list(fontFamily = "Source Han Serif")
))) %>%
hc_credits(enabled = T)
```
## row {data-height=400}
### 离职员工离职类型分布
```{r}
exit_type <- exit %>%dplyr::count(Attrition_Type)
colnames(exit_type) <- c('name','y')
highchart() %>%
hc_chart(type = "pie") %>%
hc_add_series(data = exit_type,
colorByPoint = TRUE,
colors=c('#807DBA','#D87A80' ),
innerSize = '50%') %>%
hc_tooltip(pointFormat = '{series.name}: <b>{point.percentage:.1f}%</b>') %>%
hc_title(text = '离职员工离职类型分布',
style = list(fontFamily = "SourceHanSerifSC-Medium")) %>%
hc_add_theme(hc_theme_flat()) %>%
hc_plotOptions(pie = list(
allowPointSelect = TRUE,
cursor = 'pointer',
dataLabels = list(
enabled = T,
format = '<b>{point.name}</b>: {point.percentage:.1f} %'
),
showInLegend = T,
startAngle = -90, # 圆环的开始角度
endAngle = 90, # 圆环的结束角度
center = c('50%', '75%')
))
```
### 离职员工离职原因分布
```{r}
exit_reason <- exit %>%dplyr::count(Reason_for_Leaving)
colnames(exit_reason) <- c('name','y')
exit_reason <- exit_reason %>% arrange(desc(y))
highchart() %>%
hc_chart(type = "pie") %>%
hc_title(text = "离职员工离职原因分布") %>%
hc_add_series(data = exit_reason,
innerSize = '60%',
colorByPoint = TRUE,
colors=c('#D87A80',"#666699", "#FFCC66", "#99CC99","#CC6699",'#3399CC' )) %>%
hc_tooltip(pointFormat = '{series.name}: <b>{point.percentage:.1f}%</b>') %>%
hc_title(
style = list(fontFamily = "SourceHanSerifSC-Medium")) %>%
hc_subtitle(style = list(fontFamily = "SourceHanSerifSC-Medium")) %>%
hc_add_theme(hc_theme_flat()) %>%
hc_plotOptions(pie = list(
allowPointSelect = TRUE,
cursor = 'pointer',
dataLabels = list(
enabled = T,
format = '<b>{point.name}</b>: {point.percentage:.1f} %'
),
showInLegend = T
))
```
### 离职人员数量最多的八个工作室
```{r}
exit_studio<-exit%>%group_by(Studio) %>%
dplyr::count()
exit_studio <- exit_studio[order(-exit_studio$n),]
exit_studio <- exit_studio[1:8,]
#other_n_sum <- sum(entry_studio[-(1:7), "n"])
#entry_studio <- data.frame(Client = "其它", n = other_n_sum)
#entry_studio <- rbind(top7_entry_studio, entry_studio)
highchart() %>%
hc_chart(type = "bar") %>%
hc_title(text = "离职人员数量最多的八个工作室") %>%
hc_colors(colors = c('#666699')) %>%
hc_xAxis(categories = exit_studio$Studio) %>%
hc_yAxis(title = list(text = "人数")) %>%
hc_add_series(name = "Studio", data = exit_studio$n) %>%
hc_plotOptions(column = list(stacking = 'normal'),
dataLabels = list(enabled = T,
style = list(textOutline = '1px 1px black'))) %>%
hc_add_theme(hc_theme_flat(chart = list(
style = list(fontFamily = "Source Han Serif")
))) %>%
hc_tooltip(
shared = TRUE,
pointFormat = "<b>{point.y}</b>"
)
```
# 趋势预测及结论建议
## Column
### 时间序列HoltWinters预测——2019年入职情况
```{r}
library(forecast)
df_time<-df_all%>%mutate(month_h=month(Date_of_Hire),month_e=month(Date_of_Exit),year_e=year(Date_of_Exit),year_h=year(Date_of_Hire))%>%dplyr::filter(year_e==2019|year_h==2019)
hr_date<-df_time%>%dplyr::filter(year_h==2019)%>%group_by(month_h)%>%count()
ex_date<-df_time%>%dplyr::filter(year_e==2019)%>%group_by(month_e)%>%count()
hr <- ts(hr_date[, 2], start = c(2019, 1), end = c(2019, 9), frequency = 12)
ex <- ts(ex_date[, 2], start = c(2019, 1), end = c(2019, 8), frequency = 12)
fit1 <- HoltWinters(hr, gamma = FALSE)
fit2 <- HoltWinters(ex, gamma = FALSE)
forecast_hr <- forecast(fit1, h = 3)
forecast_ex <- forecast(fit2, h = 4)
plot(forecast_hr, col='#FF000088', lwd=4,ylim=c(-3,20),xaxt='n',ylab='入职人数',main='2019年入职情况HoltWinters预测')
```
### 时间序列HoltWinters预测——2019年离职情况
```{r}
plot(forecast_ex, col='#4040ff88', lwd=4,ylim=c(-3,20),xaxt='n',ylab='离职人数',main='2019年离职情况HoltWinters预测')
```
### 时间序列HoltWinters预测——2019年情况汇总
```{r}
forecast_hr$upper<-forecast_hr$upper/1000
forecast_ex$upper<-forecast_ex$upper/1000
forecast_hr$lower<-forecast_hr$lower/1000
forecast_ex$lower<-forecast_ex$lower/1000
plot(forecast_hr, col='#FF000088', lwd=4,ylim=c(-3,20),xaxt='n',yaxt='n',main='2019年离入职情况HoltWinters预测')
par(new = TRUE)
plot(forecast_ex,ylab='离入职人数', col='#4040ff88', lwd=4,ylim=c(-3,20),xaxt='n',alpha=0.5,,main='')
legend("topleft", c("入职", "离职"), col = c('red','blue'), lty = 1,lwd=1)
```
## Row
### 各领导下属离职情况
```{r}
library(tidyr)
Entry<-df_all%>% dplyr::filter(Type_of_movement == 'Entry')%>%dplyr::count(Manager_Name)
colnames(Entry)<-c('Manager_Name','en_n')
en_ex<-df_all%>%dplyr::filter(Type_of_movement == 'Exit')%>%dplyr::count(Manager_Name)%>%full_join(Entry)%>%mutate(rate=n/en_n)%>%arrange(desc(rate))
colnames(en_ex)<-c('Manager_Name','ex_n','en_n')
en_ex[is.na(en_ex)] = 0
highchart() %>%
# hc_title(text = "各国人员流动堆积图", align="center") %>%
hc_xAxis(categories = en_ex$Manager_Name ) %>%
hc_plotOptions(column = list(
#将两组值合并在一起,默认值是TRUE
grouping = FALSE,
#设置图的阴影,默认值是FALSE
shadow = FALSE,
#合并后柱子的边界宽度
borderWidth = 0)) %>%
hc_add_series(
#名称
name = "新进",
data = en_ex$en_n,
type = "column",
color = "#D87A80",
opacity = 0.8,
pointPadding = 0.1,
pointPlacement = 0) %>%
hc_add_series(
name = "离职",
data = en_ex$ex_n,
type = "column",
color = "#7e5686",
opacity = 0.9,
pointPadding = 0.2,
pointPlacement = 0) %>%
hc_add_theme(hc_theme_flat(chart = list(
style = list(fontFamily = "Source Han Serif")
))) %>%
hc_title(text = "不同领导下属离职情况",align="center")
```
### 各国 Work_Authorisation 情况
```{r}
country_wa<-df_all%>%group_by(Country,Work_Authorisation)%>%count(Work_Authorisation)%>%arrange(desc(n))
all_combinations <- expand.grid(
Country = unique(country_wa$Country),
Work_Authorisation = unique(country_wa$Work_Authorisation)
)
# 将组合与原始数据集进行合并
country_wa <- merge(all_combinations, country_wa , all = TRUE)
# 将缺失的 n 值填充为 0
country_wa$n[is.na(country_wa $n)] <- 0
# 打印结果
xl<-unique(country_wa$Country)
highchart() %>%
#hc_chart(zoomType = 'xy') %>%
hc_title(text = '各国 Work Authorisation 分布情况') %>%
hc_xAxis(categories = xl, crosshair = T) %>%
hc_yAxis_multiples(list(title = list(text = '各国Work Authorisation人数'),
opposite = FALSE)
) %>%
hc_colors(colors = c("#CC6699", "#FFCC66", "#99CC99", "#666699",'#D87A80','#660000','#3399CC')) %>%
hc_add_series(data = pull(subset(country_wa, Work_Authorisation == "Citizen" ), n),yAxis = 0, name = "Citizen", type = "column") %>%
hc_add_series(data = pull(subset(country_wa, Work_Authorisation == "Work Permit"), n),yAxis = 0, name = "Work Permit", type = "column") %>%
hc_add_series(data = pull(subset(country_wa, Work_Authorisation == "unknown"), n),yAxis = 0, name = "unknown", type = "column") %>%
hc_add_series(data = pull(subset(country_wa, Work_Authorisation == "Permanent Resident"), n),yAxis = 0, name = "Permanent Resident", type = "column") %>%
hc_plotOptions(column = list(stacking = 'normal'),
dataLabels = list(enabled = T,
style = list(textOutline = '1px 1px black'))) %>%
hc_add_theme(hc_theme_flat(chart = list(
style = list(fontFamily = "Source Han Serif")
))) %>%
hc_credits(enabled = T)
```